

library(rio)
library(ggplot2)
library(texreg)
library(tidyverse)
library(AER)
library(GGally)

##set the working directory for the replication archive
working_dir <- "~/Dropbox/dueling project/2nd_paper/replication archive/"

##County Data
counts_county <- readRDS(paste0(working_dir,"Data/PostOfficesByDecade_county.Rds"))

##generate lags
counts_county <- 
  counts_county %>%
  arrange(state, county, decade) %>%
  group_by(state, county) %>%
  mutate(numPost_lag = lag(numPost), numPost_lag2 = lag(numPost, 2), numPost_lag3 = lag(numPost, 3)) %>%
  ungroup()

##generate correlations
counts_county_corr <-
  counts_county %>%
  filter(decade >= 1840) %>%
  group_by(decade) %>%
  summarise(corr_1lag = cor(numPost_lag, numPost, use = "pairwise"),
            corr_2lag = cor(numPost_lag2, numPost, use = "pairwise"),
            corr_3lag = cor(numPost_lag3, numPost, use = "pairwise"))

##make a plot
one_lag <- 
  ggplot(counts_county %>% filter(decade >= 1840), aes(numPost_lag, numPost)) +
  geom_point() + 
  geom_text(data=counts_county_corr, aes(label = paste("corr.=", round(corr_1lag, digits = 2), sep="")), 
            x=seq(40, 120, length.out = 6), y=10) +
  xlab("Number of Post Offices (lagged one decade)") +
  ylab("Number of Post Offices (this decade)") +
  facet_wrap( ~ decade, scales = "free") +
  theme_bw()


two_lags <- 
  ggplot(counts_county %>% filter(decade >= 1840), aes(numPost_lag2, numPost)) +
  geom_point() + 
  geom_text(data=counts_county_corr, aes(label = paste("corr.=", round(corr_2lag, digits = 2), sep="")), 
            x=seq(40, 100, length.out = 6), y=10) +
  xlab("Number of Post Offices (lagged two decades)") +
  ylab("Number of Post Offices (this decade)") +
  facet_wrap( ~ decade, scales = "free") +
  theme_bw()

three_lags <- 
  ggplot(counts_county %>% filter(decade >= 1840), aes(numPost_lag3, numPost)) +
  geom_point() + 
  geom_text(data=counts_county_corr, aes(label = paste("corr.=", round(corr_3lag, digits = 2), sep="")), 
            x=seq(20, 80, length.out = 6), y=10) +
  xlab("Number of Post Offices (lagged three decades)") +
  ylab("Number of Post Offices (this decade)") +
  facet_wrap( ~ decade, scales = "free") +
  theme_bw()

ggsave(one_lag, 
       file = paste0(working_dir,"Replication Plots/POlag1Dec.pdf"), 
       width = 12, height = 4)

ggsave(two_lags, 
       file = paste0(working_dir,"Replication Plots/POlag2Dec.pdf"), 
       width = 12, height = 4)

ggsave(three_lags, 
       file = paste0(working_dir,"Replication Plots/POlag3Dec.pdf"), 
       width = 12, height = 4)

##now we will make a heatmap version
spread_mat <-
  as.data.frame(spread(counts_county %>% 
                         select(-state_po) %>% 
                         ungroup() %>% 
                         group_by(state, county) %>% 
                         mutate(ID = group_indices()) %>%
                         ungroup() %>%
                         select(-state,-county) %>%
                         mutate(decade = paste0(decade)), 
                       key = decade, value = numPost))

cormat <- round(cor(spread_mat[,-1]),2)

mat_plot <-
  ggpairs(spread_mat, columns = 2:ncol(spread_mat), title = "")

cor_plot <-
  ggcorr(spread_mat[,-1], columns = 2:ncol(spread_mat), limits = c(0.4,1), midpoint = 0.75)


my_fn <- function(data, mapping, method="p", use="pairwise", ...){
  
  # grab data
  x <- eval_data_col(data, mapping$x)
  y <- eval_data_col(data, mapping$y)
  
  # calculate correlation
  corr <- cor(x, y, method=method, use=use)
  
  # calculate colour based on correlation value
  # Here I have set a correlation of minus one to blue, 
  # zero to white, and one to red 
  # Change this to suit: possibly extend to add as an argument of `my_fn`
  colFn <- colorRampPalette(c("blue", "white", "red"), interpolate ='spline')
  fill <- colFn(100)[findInterval(corr, seq(0.4, 1, length=100))]
  
  ggally_cor(data = data, color = "black", mapping = mapping, ...) + 
    theme_void() +
    theme(panel.background = element_rect(fill=fill))
}

my_fn_diag <- function(data, mapping, ...){
  ggally_diagAxis(data, mapping, label = mapping$x, 
                  labelSize = 5, labelXPercent = 0.5,
                  labelYPercent = 0.55, labelHJust = 0.5, 
                  labelVJust = 0.5, gridLabelSize = 4, ...)
  
}

ggplot2::theme_set(ggplot2::theme_bw()) 
mat_plot <-
  ggpairs(spread_mat, columns = 2:ncol(spread_mat), title = "", 
          upper = list(continuous = my_fn), axisLabels = "internal")

ggsave(mat_plot, 
       file = paste0(working_dir,"Replication Plots/POcorPlot.pdf"), 
       width = 12, height = 12)
